*1️⃣ Import Libraries*¶

In [68]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import warnings
In [69]:
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
In [70]:
import cv2
import random
import os
from PIL import Image
from tqdm import tqdm
import shutil
In [71]:
# Ignore all warnings
warnings.filterwarnings("ignore")

*2️⃣ Directories*¶

In [176]:
# Define paths
base_dir = "data"
raw_uppercase_dir = os.path.join(base_dir, "raw/UPPER_CASE")
raw_lowercase_dir = os.path.join(base_dir, "raw/lower_case")

balance_uppercase_dir = os.path.join(base_dir, "balance/UPPER_CASE")
balance_lowercase_dir = os.path.join(base_dir, "balance/lower_case")

aug_uppercase_dir = os.path.join(base_dir, "augmentation/UPPER_CASE")
aug_lowercase_dir = os.path.join(base_dir, "augmentation/lower_case")

split_dir = os.path.join(base_dir, "split")

split_train_dir = os.path.join(split_dir, "train")
split_test_dir = os.path.join(split_dir, "test")
split_val_dir = os.path.join(split_dir, "val")
In [178]:
# Create balance directories if they don't exist
os.makedirs(balance_uppercase_dir, exist_ok=True)
os.makedirs(balance_lowercase_dir, exist_ok=True)

os.makedirs(aug_uppercase_dir, exist_ok=True)
os.makedirs(aug_lowercase_dir, exist_ok=True)

os.makedirs(split_uppercase_dir, exist_ok=True)
os.makedirs(split_lowercase_dir, exist_ok=True)

os.makedirs(split_train_dir, exist_ok=True)
os.makedirs(split_test_dir, exist_ok=True)
os.makedirs(split_val_dir, exist_ok=True)

*3️⃣ Image Loading*¶

In [76]:
# Initialize lists to store images and labels
uppercase_images = []
uppercase_labels = []
lowercase_images = []
lowercase_labels = []
In [77]:
# Load uppercase images
for folder in os.listdir(raw_uppercase_dir):
    folder_path = os.path.join(raw_uppercase_dir, folder)
    if os.path.isdir(folder_path):
        for filename in tqdm(os.listdir(folder_path), desc=f"Loading {folder}...."):
            if filename.endswith(".png"):
                img_path = os.path.join(folder_path, filename)
                try:
                    img = Image.open(img_path)
                    img_array = np.array(img)
                    uppercase_images.append(img_array)
                    uppercase_labels.append(folder)
                except Exception as e:
                    print(f"Error loading image: {img_path}, Error: {e}")
Loading A....: 100%|████████████████████████████████████████████████████████████████| 150/150 [00:00<00:00, 212.86it/s]
Loading B....: 100%|████████████████████████████████████████████████████████████████| 140/140 [00:00<00:00, 156.88it/s]
Loading C....: 100%|████████████████████████████████████████████████████████████████| 101/101 [00:00<00:00, 141.12it/s]
Loading D....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 157.45it/s]
Loading E....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 157.18it/s]
Loading F....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 140.87it/s]
Loading G....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 120.82it/s]
Loading H....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 147.21it/s]
Loading I....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 161.30it/s]
Loading J....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 169.23it/s]
Loading K....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 146.08it/s]
Loading L....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 186.19it/s]
Loading M....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 171.19it/s]
Loading N....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 145.22it/s]
Loading O....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 133.47it/s]
Loading P....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 175.47it/s]
Loading Q....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 194.36it/s]
Loading R....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 156.48it/s]
Loading S....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 169.54it/s]
Loading T....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 131.47it/s]
Loading U....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 145.92it/s]
Loading V....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 158.70it/s]
Loading W....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 117.63it/s]
Loading X....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 114.26it/s]
Loading Y....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 119.39it/s]
Loading Z....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 154.08it/s]
In [78]:
# Load lowercase images
for folder in os.listdir(raw_lowercase_dir):
    folder_path = os.path.join(raw_lowercase_dir, folder)
    if os.path.isdir(folder_path):
        for filename in tqdm(os.listdir(folder_path), desc=f"Loading {folder}...."):
            if filename.endswith(".png"):
                img_path = os.path.join(folder_path, filename)
                try:
                    img = Image.open(img_path)
                    img_array = np.array(img)
                    lowercase_images.append(img_array)
                    lowercase_labels.append(folder)
                except Exception as e:
                    print(f"Error loading image: {img_path}, Error: {e}")
Loading a....: 100%|████████████████████████████████████████████████████████████████| 140/140 [00:00<00:00, 179.94it/s]
Loading b....: 100%|████████████████████████████████████████████████████████████████| 128/128 [00:00<00:00, 162.88it/s]
Loading c....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 138.64it/s]
Loading d....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 155.85it/s]
Loading e....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 113.88it/s]
Loading f....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 129.28it/s]
Loading g....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 108.20it/s]
Loading h....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 173.76it/s]
Loading i....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 143.81it/s]
Loading j....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 142.82it/s]
Loading k....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 142.69it/s]
Loading l....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 120.30it/s]
Loading m....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 135.95it/s]
Loading n....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 121.24it/s]
Loading o....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 148.97it/s]
Loading p....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 106.61it/s]
Loading q....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 125.27it/s]
Loading r....: 100%|████████████████████████████████████████████████████████████████| 200/200 [00:01<00:00, 113.81it/s]
Loading s....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 132.77it/s]
Loading t....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 137.37it/s]
Loading u....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 121.20it/s]
Loading v....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 129.57it/s]
Loading w....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 120.74it/s]
Loading x....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 125.55it/s]
Loading y....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 121.75it/s]
Loading z....: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 118.11it/s]
In [79]:
# Get image shapes and lengths
uppercase_image_shape = uppercase_images[0].shape
lowercase_image_shape = lowercase_images[0].shape
uppercase_image_count = len(uppercase_images)
lowercase_image_count = len(lowercase_images)
In [80]:
print(f"Uppercase Image Shape: {uppercase_image_shape}")
print(f"Lowercase Image Shape: {lowercase_image_shape}")
print(f"Number of Uppercase Images: {uppercase_image_count}")
print(f"Number of Lowercase Images: {lowercase_image_count}")
Uppercase Image Shape: (525, 483, 4)
Lowercase Image Shape: (525, 483, 4)
Number of Uppercase Images: 2600
Number of Lowercase Images: 2700
In [81]:
# Display sample uppercase images
plt.figure(figsize=(10, 5))
for i in range(10):
    plt.subplot(2, 5, i+1)
    plt.imshow(uppercase_images[i])
    plt.title(f"Uppercase: {uppercase_labels[i]}")
    plt.axis('off')
plt.suptitle("Sample Uppercase Images")
plt.show()
No description has been provided for this image
In [82]:
# Display sample lowercase images
plt.figure(figsize=(10, 5))
for i in range(10):
    plt.subplot(2, 5, i+1)
    plt.imshow(lowercase_images[i])
    plt.title(f"Lowercase: {lowercase_labels[i]}")
    plt.axis('off')
plt.suptitle("Sample Lowercase Images")
plt.show()
No description has been provided for this image
In [83]:
# Combine images and labels
all_images = uppercase_images + lowercase_images
all_labels = uppercase_labels + lowercase_labels

# Get unique labels and their counts
unique_labels, label_counts = np.unique(all_labels, return_counts=True)
In [84]:
lower_labels, lower_label_count = np.unique(lowercase_labels, return_counts=True)

# Create a DataFrame to store label counts
lower_label_counts = pd.DataFrame({'Label': lower_labels, 'Count': lower_label_count})

# Sort the DataFrame by Label (A-Z, a-z)
lower_label_counts = lower_label_counts.sort_values(by='Label')

print("Lower Label Counts:")
display(lower_label_counts)
Lower Label Counts:
Label Count
0 a 100
1 b 100
2 c 100
3 d 100
4 e 100
5 f 100
6 g 100
7 h 100
8 i 100
9 j 100
10 k 100
11 l 100
12 m 100
13 n 100
14 o 100
15 p 100
16 q 100
17 r 200
18 s 100
19 t 100
20 u 100
21 v 100
22 w 100
23 x 100
24 y 100
25 z 100
In [85]:
upper_labels, upper_label_count = np.unique(uppercase_labels, return_counts=True)

# Create a DataFrame to store label counts
upper_label_counts = pd.DataFrame({'Label': upper_labels, 'Count': upper_label_count})

# Sort the DataFrame by Label (A-Z, a-z)
upper_label_counts = upper_label_counts.sort_values(by='Label')

print("Upper Label Counts:")
display(upper_label_counts)
Upper Label Counts:
Label Count
0 A 100
1 B 100
2 C 100
3 D 100
4 E 100
5 F 100
6 G 100
7 H 100
8 I 100
9 J 100
10 K 100
11 L 100
12 M 100
13 N 100
14 O 100
15 P 100
16 Q 100
17 R 100
18 S 100
19 T 100
20 U 100
21 V 100
22 W 100
23 X 100
24 Y 100
25 Z 100
In [86]:
# Create a DataFrame to store label counts
label_counts_df = pd.DataFrame({'Label': unique_labels, 'Count': label_counts})

# Sort the DataFrame by Label (A-Z, a-z)
label_counts_df = label_counts_df.sort_values(by='Label')

print("Label Counts:")
display(label_counts_df)
Label Counts:
Label Count
0 A 100
1 B 100
2 C 100
3 D 100
4 E 100
5 F 100
6 G 100
7 H 100
8 I 100
9 J 100
10 K 100
11 L 100
12 M 100
13 N 100
14 O 100
15 P 100
16 Q 100
17 R 100
18 S 100
19 T 100
20 U 100
21 V 100
22 W 100
23 X 100
24 Y 100
25 Z 100
26 a 100
27 b 100
28 c 100
29 d 100
30 e 100
31 f 100
32 g 100
33 h 100
34 i 100
35 j 100
36 k 100
37 l 100
38 m 100
39 n 100
40 o 100
41 p 100
42 q 100
43 r 200
44 s 100
45 t 100
46 u 100
47 v 100
48 w 100
49 x 100
50 y 100
51 z 100
In [87]:
# Visualize the number of images per class
plt.figure(figsize=(12, 6))
plt.bar(lower_label_counts['Label'], lower_label_counts['Count'], color='skyblue')
plt.xlabel("Labels")
plt.ylabel("Number of Images")
plt.title("Number of Images per Class")
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()
No description has been provided for this image
In [88]:
# Visualize the number of images per class
plt.figure(figsize=(12, 6))
plt.bar(upper_label_counts['Label'], upper_label_counts['Count'], color='skyblue')
plt.xlabel("Uppercase Labels")
plt.ylabel("Number of Images")
plt.title("Number of Images per Class")
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()
No description has been provided for this image
In [89]:
# Visualize the number of images per class
plt.figure(figsize=(12, 6))
plt.bar(label_counts_df['Label'], label_counts_df['Count'], color='skyblue')
plt.xlabel("Labels")
plt.ylabel("Number of Images")
plt.title("Number of Images per Class")
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()
No description has been provided for this image
In [90]:
# Pie chart for distribution of images per class
plt.figure(figsize=(18, 18))
plt.pie(upper_label_count, labels=upper_labels, autopct='%1.1f%%', startangle=90, colors=plt.cm.tab20.colors)
plt.title("Distribution of Images per Class")
plt.show()
No description has been provided for this image
In [91]:
# Pie chart for distribution of images per class
plt.figure(figsize=(18, 18))
plt.pie(lower_label_count, labels=lower_labels, autopct='%1.1f%%', startangle=90, colors=plt.cm.tab20.colors)
plt.title("Distribution of Images per Class")
plt.show()
No description has been provided for this image
In [92]:
# Pie chart for distribution of images per class
plt.figure(figsize=(18, 18))
plt.pie(label_counts, labels=unique_labels, autopct='%1.1f%%', startangle=140, colors=plt.cm.tab20.colors)
plt.title("Distribution of Images per Class")
plt.show()
No description has been provided for this image
In [93]:
# Histogram of pixel intensities for a sample uppercase image
sample_image = all_images[0]
plt.figure(figsize=(10, 5))
plt.hist(sample_image.ravel(), bins=256, range=(0, 256), color='black')
plt.title("Pixel Intensity Distribution (Sample Image)")
plt.xlabel("Pixel Intensity")
plt.ylabel("Frequency")
plt.grid(linestyle='--', alpha=0.7)
plt.show()
No description has been provided for this image
In [94]:
# Histogram of pixel intensities for a sample uppercase image
sample_uppercase_image = uppercase_images[0]
plt.figure(figsize=(10, 5))
plt.hist(sample_uppercase_image.ravel(), bins=256, range=(0, 256), color='black')
plt.title("Pixel Intensity Distribution (Uppercase Sample Image)")
plt.xlabel("Pixel Intensity")
plt.ylabel("Frequency")
plt.grid(linestyle='--', alpha=0.7)
plt.show()
No description has been provided for this image
In [95]:
# Histogram of pixel intensities for a sample lowercase image
sample_lowercase_image = lowercase_images[4]
plt.figure(figsize=(10, 5))
plt.hist(sample_lowercase_image.ravel(), bins=256, range=(0, 256), color='black')
plt.title("Pixel Intensity Distribution (Lowercase Sample Image)")
plt.xlabel("Pixel Intensity")
plt.ylabel("Frequency")
plt.grid(linestyle='--', alpha=0.7)
plt.show()
No description has been provided for this image
In [96]:
# Function to compute and plot mean and standard deviation of pixel intensities
def plot_pixel_statistics(images, labels, title):
    """
    Computes and plots the mean and standard deviation of pixel intensities for each label.
    """
    unique_labels = np.unique(labels)
    label_means = {label: [] for label in unique_labels}
    label_stds = {label: [] for label in unique_labels}

    for img, label in zip(images, labels):
        label_means[label].append(np.mean(img))
        label_stds[label].append(np.std(img))

    # Compute average mean and std for each label
    avg_means = {label: np.mean(label_means[label]) for label in unique_labels}
    avg_stds = {label: np.mean(label_stds[label]) for label in unique_labels}

    plt.figure(figsize=(12, 6))
    plt.bar(avg_means.keys(), avg_means.values(), color='blue', alpha=0.7, label='Mean')
    plt.bar(avg_stds.keys(), avg_stds.values(), color='red', alpha=0.7, label='Std Dev')
    plt.xlabel("Labels")
    plt.ylabel("Pixel Intensity")
    plt.title(title)
    plt.xticks(rotation=45)
    plt.legend()
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.show()
In [97]:
# Plot pixel statistics for original uppercase and lowercase data
plot_pixel_statistics(uppercase_images, uppercase_labels, "Uppercase Pixel Statistics (Original Data)")
No description has been provided for this image
In [98]:
plot_pixel_statistics(lowercase_images, lowercase_labels, "Lowercase Pixel Statistics (Original Data)")
No description has been provided for this image
In [99]:
# Interactive label and image display
while True:
    label_input = input("Enter a label (A-Z, a-z) or '#' to quit: ")
    if label_input == '#':
        break
    try:
        label_index = all_labels.index(label_input)
        plt.imshow(all_images[label_index])
        plt.title(f"Label: {label_input}")
        plt.axis('off')
        plt.show()
    except ValueError:
        print("Invalid label. Please enter a valid label (A-Z, a-z) or '#' to quit.")
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

*4️⃣ Balancing Dataset*¶

In [101]:
# Function to balance images for each letter
def balance_images(src_dir, dest_dir, target_count=100):
    for letter in os.listdir(src_dir):
        letter_path = os.path.join(src_dir, letter)
        balanced_letter_path = os.path.join(dest_dir, letter)
        os.makedirs(balanced_letter_path, exist_ok=True)

        # List all images in the letter directory
        images = [img for img in tqdm(os.listdir(letter_path),desc=f"Balancing {letter}...") if img.endswith('.png')]

        # Randomly select images if more than target_count
        if len(images) > target_count:
            images_to_copy = random.sample(images, target_count)
        else:
            images_to_copy = images  # Keep all if less than or equal to target_count

        # Copy selected images to balanced directory
        for img in images_to_copy:
            src_img_path = os.path.join(letter_path, img)
            dest_img_path = os.path.join(balanced_letter_path, img)
            shutil.copy(src_img_path, dest_img_path)
    print("Dataset balancing complete!")
In [102]:
# Balance uppercase and lowercase images
balance_images(raw_uppercase_dir, balance_uppercase_dir, target_count=100)
balance_images(raw_lowercase_dir, balance_lowercase_dir, target_count=100)
Balancing A...: 100%|████████████████████████████████████████████████████████████████████████| 150/150 [00:00<?, ?it/s]
Balancing B...: 100%|████████████████████████████████████████████████████████████████████████| 140/140 [00:00<?, ?it/s]
Balancing C...: 100%|████████████████████████████████████████████████████████████████████████| 101/101 [00:00<?, ?it/s]
Balancing D...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing E...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing F...: 100%|█████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 49601.51it/s]
Balancing G...: 100%|█████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 40956.00it/s]
Balancing H...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing I...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing J...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing K...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing L...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing M...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing N...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing O...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing P...: 100%|█████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 65979.30it/s]
Balancing Q...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing R...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing S...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing T...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing U...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing V...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing W...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing X...: 100%|█████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 13120.32it/s]
Balancing Y...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing Z...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Dataset balancing complete!
Balancing a...: 100%|████████████████████████████████████████████████████████████████████████| 140/140 [00:00<?, ?it/s]
Balancing b...: 100%|████████████████████████████████████████████████████████████████████████| 128/128 [00:00<?, ?it/s]
Balancing c...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing d...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing e...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing f...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing g...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing h...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing i...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing j...: 100%|█████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 66177.09it/s]
Balancing k...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing l...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing m...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing n...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing o...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing p...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing q...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing r...: 100%|█████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 27999.36it/s]
Balancing s...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing t...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing u...: 100%|█████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 49176.97it/s]
Balancing v...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing w...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing x...: 100%|█████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 66239.80it/s]
Balancing y...: 100%|████████████████████████████████████████████████████████████████████████| 100/100 [00:00<?, ?it/s]
Balancing z...: 100%|█████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 13252.98it/s]
Dataset balancing complete!
In [103]:
# Function to display random images from each label
def display_random_images(data_dir, title):
    """
    Displays a random image from each label folder in the specified directory.
    """
    labels = sorted(os.listdir(data_dir))
    plt.figure(figsize=(15, 10))
    for i, label in enumerate(labels):
        label_dir = os.path.join(data_dir, label)
        if os.path.isdir(label_dir):
            images = os.listdir(label_dir)
            if images:
                img_path = os.path.join(label_dir, random.choice(images))
                img = Image.open(img_path)
                plt.subplot(5, 6, i+1)
                plt.imshow(img, cmap='gray')
                plt.title(label)
                plt.axis('off')
    plt.suptitle(title)
    plt.show()
In [104]:
# Display random images for uppercase
display_random_images(balance_uppercase_dir, "Random Uppercase Images (Balance)")
No description has been provided for this image
In [105]:
# Display random images for uppercase
display_random_images(balance_lowercase_dir, "Random Uppercase Images (Balance)")
No description has been provided for this image
In [106]:
# Initialize lists to store images and labels
balance_uppercase_img = []
balance_uppercase_labels = []
balance_lowercase_img = []
balance_lowercase_labels = []
In [107]:
# Load uppercase images
for folder in os.listdir(balance_uppercase_dir):
    folder_path = os.path.join(balance_uppercase_dir, folder)
    if os.path.isdir(folder_path):
        for filename in tqdm(os.listdir(folder_path), desc=f"Loading {folder}...."):
            if filename.endswith(".png"):
                img_path = os.path.join(folder_path, filename)
                try:
                    img = Image.open(img_path)
                    img_array = np.array(img)
                    balance_uppercase_img.append(img_array)
                    balance_uppercase_labels.append(folder)
                except Exception as e:
                    print(f"Error loading image: {img_path}, Error: {e}")
Loading A....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 41.88it/s]
Loading B....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 46.17it/s]
Loading C....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 61.25it/s]
Loading D....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 56.94it/s]
Loading E....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 50.95it/s]
Loading F....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 49.47it/s]
Loading G....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 53.07it/s]
Loading H....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 51.38it/s]
Loading I....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 51.72it/s]
Loading J....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 41.22it/s]
Loading K....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 45.11it/s]
Loading L....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 44.75it/s]
Loading M....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 50.18it/s]
Loading N....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 59.46it/s]
Loading O....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 51.68it/s]
Loading P....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 48.02it/s]
Loading Q....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 42.25it/s]
Loading R....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 41.40it/s]
Loading S....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 45.50it/s]
Loading T....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 59.07it/s]
Loading U....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 59.55it/s]
Loading V....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 57.93it/s]
Loading W....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 54.89it/s]
Loading X....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 52.49it/s]
Loading Y....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 55.50it/s]
Loading Z....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 56.25it/s]
In [108]:
# Load uppercase images
for folder in os.listdir(balance_lowercase_dir):
    folder_path = os.path.join(balance_lowercase_dir, folder)
    if os.path.isdir(folder_path):
        for filename in tqdm(os.listdir(folder_path), desc=f"Loading {folder}...."):
            if filename.endswith(".png"):
                img_path = os.path.join(folder_path, filename)
                try:
                    img = Image.open(img_path)
                    img_array = np.array(img)
                    balance_lowercase_img.append(img_array)
                    balance_lowercase_labels.append(folder)
                except Exception as e:
                    print(f"Error loading image: {img_path}, Error: {e}")
Loading a....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 61.04it/s]
Loading b....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 57.86it/s]
Loading c....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 54.73it/s]
Loading d....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 49.34it/s]
Loading e....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 49.59it/s]
Loading f....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 47.44it/s]
Loading g....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 46.57it/s]
Loading h....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 51.82it/s]
Loading i....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 66.47it/s]
Loading j....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 49.08it/s]
Loading k....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 40.57it/s]
Loading l....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 41.17it/s]
Loading m....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 52.15it/s]
Loading n....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 48.03it/s]
Loading o....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 54.65it/s]
Loading p....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 66.30it/s]
Loading q....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 47.37it/s]
Loading r....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 42.55it/s]
Loading s....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 54.47it/s]
Loading t....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 59.38it/s]
Loading u....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 62.78it/s]
Loading v....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 42.36it/s]
Loading w....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 55.71it/s]
Loading x....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 52.42it/s]
Loading y....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 53.74it/s]
Loading z....: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 55.51it/s]
In [109]:
# Get image shapes and lengths
balance_uppercase_img_shape = balance_uppercase_img[0].shape
balance_lowercase_img_shape = balance_lowercase_img[0].shape
balance_uppercase_img_count = len(balance_uppercase_img)
balance_lowercase_count = len(balance_lowercase_img)

print(f"Uppercase Image Shape: {balance_uppercase_img_shape}")
print(f"Lowercase Image Shape: {lowercase_image_shape}")
print(f"Number of Uppercase Images: {balance_uppercase_img_count}")
print(f"Number of Lowercase Images: {balance_lowercase_count}")
Uppercase Image Shape: (525, 483, 4)
Lowercase Image Shape: (525, 483, 4)
Number of Uppercase Images: 2600
Number of Lowercase Images: 2600
In [110]:
balance_img = balance_uppercase_img + balance_lowercase_img
balance_labels = balance_uppercase_labels + balance_lowercase_labels
In [111]:
# Get unique labels and their counts
blance_img_labels, balance_img_count = np.unique(balance_labels, return_counts=True)
balance_uppercase_img_labels, balance_uppercase_img_count = np.unique(balance_uppercase_labels, return_counts=True)
balance_lowercase_img_labels, balance_lowercase_img_count = np.unique(balance_lowercase_labels, return_counts=True)
In [112]:
# Create a DataFrame to store label counts
balance_upper_label_counts= pd.DataFrame({'Label': balance_uppercase_img_labels, 'Count': balance_uppercase_img_count})

# Sort the DataFrame by Label (A-Z, a-z)
balance_upper_label_counts= balance_upper_label_counts.sort_values(by='Label')

print("Label Counts:")
display(balance_upper_label_counts)
Label Counts:
Label Count
0 A 100
1 B 100
2 C 100
3 D 100
4 E 100
5 F 100
6 G 100
7 H 100
8 I 100
9 J 100
10 K 100
11 L 100
12 M 100
13 N 100
14 O 100
15 P 100
16 Q 100
17 R 100
18 S 100
19 T 100
20 U 100
21 V 100
22 W 100
23 X 100
24 Y 100
25 Z 100
In [113]:
# Visualize the number of images per class
plt.figure(figsize=(12, 6))
plt.bar(balance_upper_label_counts['Label'], balance_upper_label_counts['Count'], color='skyblue')
plt.xlabel("Labels")
plt.ylabel("Number of Images")
plt.title("Number of Images per Class")
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()
No description has been provided for this image
In [114]:
# Pie chart for distribution of images per class
plt.figure(figsize=(18, 18))
plt.pie(balance_uppercase_img_count, labels=balance_uppercase_img_labels, autopct='%1.f%%', startangle=140, colors=plt.cm.tab20.colors)
plt.title("Distribution of Images per Class")
plt.show()
No description has been provided for this image
In [115]:
# Create a DataFrame to store label counts
balance_lower_label_counts= pd.DataFrame({'Label': balance_lowercase_img_labels, 'Count': balance_lowercase_img_count})

# Sort the DataFrame by Label (A-Z, a-z)
balance_lower_label_counts= balance_lower_label_counts.sort_values(by='Label')

print("Label Counts:")
display(balance_lower_label_counts)
Label Counts:
Label Count
0 a 100
1 b 100
2 c 100
3 d 100
4 e 100
5 f 100
6 g 100
7 h 100
8 i 100
9 j 100
10 k 100
11 l 100
12 m 100
13 n 100
14 o 100
15 p 100
16 q 100
17 r 100
18 s 100
19 t 100
20 u 100
21 v 100
22 w 100
23 x 100
24 y 100
25 z 100
In [116]:
# Visualize the number of images per class
plt.figure(figsize=(12, 6))
plt.bar(balance_lower_label_counts['Label'], balance_lower_label_counts['Count'], color='skyblue')
plt.xlabel("Labels")
plt.ylabel("Number of Images")
plt.title("Number of Images per Class")
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()
No description has been provided for this image
In [117]:
# Pie chart for distribution of images per class
plt.figure(figsize=(18, 18))
plt.pie(balance_lowercase_img_count, labels=balance_lowercase_img_labels, autopct='%1.f%%', startangle=140, colors=plt.cm.tab20.colors)
plt.title("Distribution of Images per Class")
plt.show()
No description has been provided for this image
In [118]:
# Create a DataFrame to store label counts
balance_label_counts= pd.DataFrame({'Label': blance_img_labels, 'Count': balance_img_count})

# Sort the DataFrame by Label (A-Z, a-z)
balance_label_counts= balance_label_counts.sort_values(by='Label')

print("Label Counts:")
display(balance_label_counts)
Label Counts:
Label Count
0 A 100
1 B 100
2 C 100
3 D 100
4 E 100
5 F 100
6 G 100
7 H 100
8 I 100
9 J 100
10 K 100
11 L 100
12 M 100
13 N 100
14 O 100
15 P 100
16 Q 100
17 R 100
18 S 100
19 T 100
20 U 100
21 V 100
22 W 100
23 X 100
24 Y 100
25 Z 100
26 a 100
27 b 100
28 c 100
29 d 100
30 e 100
31 f 100
32 g 100
33 h 100
34 i 100
35 j 100
36 k 100
37 l 100
38 m 100
39 n 100
40 o 100
41 p 100
42 q 100
43 r 100
44 s 100
45 t 100
46 u 100
47 v 100
48 w 100
49 x 100
50 y 100
51 z 100
In [119]:
# Visualize the number of images per class
plt.figure(figsize=(12, 6))
plt.bar(balance_label_counts['Label'], balance_label_counts['Count'], color='skyblue')
plt.xlabel("Labels")
plt.ylabel("Number of Images")
plt.title("Number of Images per Class")
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()
No description has been provided for this image
In [120]:
# Pie chart for distribution of images per class
plt.figure(figsize=(18, 18))
plt.pie(balance_img_count, labels=blance_img_labels, autopct='%1.f%%', startangle=140, colors=plt.cm.tab20.colors)
plt.title("Distribution of Images per Class")
plt.show()
No description has been provided for this image

*5️⃣ Preprocessing*¶

In [122]:
# Define preprocessing parameters
target_size = (28, 28)  # Resize images to 28x28
normalize = True        # Normalize pixel values to [0, 1]
augment = True          # Apply data augmentation
In [123]:
# Define augmentation parameters
rotation_range = (-10, 10)  # Rotate images by ±10 degrees
translation_range = (-5, 5) # Translate images by ±5 pixels
scale_range = (0.9, 1.1)    # Scale images by 90% to 110%
In [124]:
# Function to apply augmentation
def augment_image(img):
    """
    Applies random augmentation to an image.
    """
    # Rotation
    angle = random.uniform(rotation_range[0], rotation_range[1])
    rows, cols = img.shape[:2]
    M = cv2.getRotationMatrix2D((cols/2, rows/2), angle, 1)
    img = cv2.warpAffine(img, M, (cols, rows))

    # Translation
    tx = random.uniform(translation_range[0], translation_range[1])
    ty = random.uniform(translation_range[0], translation_range[1])
    M = np.float32([[1, 0, tx], [0, 1, ty]])
    img = cv2.warpAffine(img, M, (cols, rows))

    # Scaling
    scale = random.uniform(scale_range[0], scale_range[1])
    img = cv2.resize(img, None, fx=scale, fy=scale)
    img = cv2.resize(img, target_size)  # Resize back to target size

    return img
In [125]:
# Preprocess and save uppercase images
for folder in os.listdir(balance_uppercase_dir):
    folder_path = os.path.join(balance_uppercase_dir, folder)
    if os.path.isdir(folder_path):
        # Create corresponding folder in augmentation directory
        aug_folder_path = os.path.join(aug_uppercase_dir, folder)
        os.makedirs(aug_folder_path, exist_ok=True)

        for filename in tqdm(os.listdir(folder_path), desc=f"Processing {folder} Images....."):
            if filename.endswith(".png"):
                img_path = os.path.join(folder_path, filename)
                try:
                    # Load image
                    img = Image.open(img_path)
                    img = img.resize(target_size)  # Resize
                    img = img.convert('L')         # Convert to grayscale
                    img_array = np.array(img)

                    # Normalize
                    if normalize:
                        img_array = img_array / 255.0

                    # Augment
                    if augment:
                        img_array = augment_image(img_array)

                    # Save preprocessed image
                    aug_img_path = os.path.join(aug_folder_path, filename)
                    Image.fromarray((img_array * 255).astype(np.uint8)).save(aug_img_path)

                except Exception as e:
                    print(f"Error processing image: {img_path}, Error: {e}")
Processing A Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 74.33it/s]
Processing B Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 77.61it/s]
Processing C Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 70.95it/s]
Processing D Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 55.09it/s]
Processing E Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 61.72it/s]
Processing F Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 59.85it/s]
Processing G Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 66.86it/s]
Processing H Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 63.02it/s]
Processing I Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 62.36it/s]
Processing J Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 64.83it/s]
Processing K Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 70.24it/s]
Processing L Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 65.33it/s]
Processing M Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 96.00it/s]
Processing N Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 73.92it/s]
Processing O Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 76.32it/s]
Processing P Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 77.40it/s]
Processing Q Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 80.50it/s]
Processing R Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 89.89it/s]
Processing S Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 88.26it/s]
Processing T Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 94.77it/s]
Processing U Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 88.52it/s]
Processing V Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 84.52it/s]
Processing W Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 68.23it/s]
Processing X Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 73.32it/s]
Processing Y Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 71.15it/s]
Processing Z Images.....: 100%|██████████████████████████████████████████████████████| 100/100 [00:01<00:00, 90.57it/s]
In [126]:
# Preprocess and save lowercase images
for folder in os.listdir(balance_lowercase_dir):
    folder_path = os.path.join(balance_lowercase_dir, folder)
    if os.path.isdir(folder_path):
        # Create corresponding folder in augmentation directory
        aug_folder_path = os.path.join(aug_lowercase_dir, folder)
        os.makedirs(aug_folder_path, exist_ok=True)

        for filename in tqdm(os.listdir(folder_path), desc=f"Processing {folder} Images..."):
            if filename.endswith(".png"):
                img_path = os.path.join(folder_path, filename)
                try:
                    # Load image
                    img = Image.open(img_path)
                    img = img.resize(target_size)  # Resize
                    img = img.convert('L')         # Convert to grayscale
                    img_array = np.array(img)

                    # Normalize
                    if normalize:
                        img_array = img_array / 255.0

                    # Augment
                    if augment:
                        img_array = augment_image(img_array)

                    # Save preprocessed image
                    aug_img_path = os.path.join(aug_folder_path, filename)
                    Image.fromarray((img_array * 255).astype(np.uint8)).save(aug_img_path)

                except Exception as e:
                    print(f"Error processing image: {img_path}, Error: {e}")
Processing a Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 86.60it/s]
Processing b Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 74.42it/s]
Processing c Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 73.87it/s]
Processing d Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 73.01it/s]
Processing e Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 66.21it/s]
Processing f Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 74.06it/s]
Processing g Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 83.05it/s]
Processing h Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 96.19it/s]
Processing i Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 95.81it/s]
Processing j Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 85.03it/s]
Processing k Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 88.22it/s]
Processing l Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 92.75it/s]
Processing m Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 96.78it/s]
Processing n Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 92.59it/s]
Processing o Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 76.81it/s]
Processing p Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 80.93it/s]
Processing q Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 90.88it/s]
Processing r Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 82.87it/s]
Processing s Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 86.47it/s]
Processing t Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 67.30it/s]
Processing u Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 75.80it/s]
Processing v Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 92.03it/s]
Processing w Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 73.30it/s]
Processing x Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 69.31it/s]
Processing y Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 70.82it/s]
Processing z Images...: 100%|████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 72.02it/s]
In [127]:
# Function to display random images from each label
def display_random_images(data_dir, title):
    """
    Displays a random image from each label folder in the specified directory.
    """
    labels = sorted(os.listdir(data_dir))
    plt.figure(figsize=(15, 10))
    for i, label in enumerate(labels):
        label_dir = os.path.join(data_dir, label)
        if os.path.isdir(label_dir):
            images = os.listdir(label_dir)
            if images:
                img_path = os.path.join(label_dir, random.choice(images))
                img = Image.open(img_path)
                plt.subplot(5, 6, i+1)
                plt.imshow(img, cmap='gray')
                plt.title(label)
                plt.axis('off')
    plt.suptitle(title)
    plt.show()
In [128]:
# Display random images for uppercase
display_random_images(aug_uppercase_dir, "Random Uppercase Images (Augmented)")
No description has been provided for this image
In [129]:
# Display random images for lowercase
display_random_images(aug_lowercase_dir, "Random Lowercase Images (Augmented)")
No description has been provided for this image
In [130]:
# Function to display user-defined label and number of images
def display_user_defined_images(raw_dir, aug_dir, label, num_images):
    """
    Displays a user-defined number of images for a specific label from both raw and augmented directories.
    """
    raw_label_dir = os.path.join(raw_dir, label)
    aug_label_dir = os.path.join(aug_dir, label)

    if os.path.isdir(raw_label_dir) and os.path.isdir(aug_label_dir):
        raw_images = os.listdir(raw_label_dir)[:num_images]
        aug_images = os.listdir(aug_label_dir)[:num_images]

        plt.figure(figsize=(15, 5))
        for i, (raw_img_name, aug_img_name) in enumerate(zip(raw_images, aug_images)):
            # Display original image
            raw_img_path = os.path.join(raw_label_dir, raw_img_name)
            raw_img = Image.open(raw_img_path)
            plt.subplot(2, num_images, i+1)
            plt.imshow(raw_img, cmap='gray')
            plt.title(f"Original: {label}")
            plt.axis('off')

            # Display preprocessed image
            aug_img_path = os.path.join(aug_label_dir, aug_img_name)
            aug_img = Image.open(aug_img_path)
            plt.subplot(2, num_images, num_images + i+1)
            plt.imshow(aug_img, cmap='gray')
            plt.title(f"Preprocessed: {label}")
            plt.axis('off')
        plt.suptitle(f"Original vs Preprocessed Images for Label: {label}")
        plt.show()
    else:
        print(f"Label '{label}' not found in both directories.")
In [131]:
# User input for label and number of images
label_input = input("Enter a label (A-Z): ")
num_images_input = int(input("Enter the number of images to display: "))
display_user_defined_images(balance_uppercase_dir, aug_uppercase_dir, label_input.upper(), num_images_input)
No description has been provided for this image
In [132]:
# User input for label and number of images
label_input = input("Enter a label (a-z): ")
num_images_input = int(input("Enter the number of images to display: "))
display_user_defined_images(balance_lowercase_dir, aug_lowercase_dir, label_input.lower(), num_images_input)
No description has been provided for this image
In [133]:
# Size comparison of original and processed images
def compare_image_sizes(raw_dir, aug_dir, label):
    """
    Compares the sizes of original and preprocessed images for a specific label.
    """
    raw_label_dir = os.path.join(raw_dir, label)
    aug_label_dir = os.path.join(aug_dir, label)

    if os.path.isdir(raw_label_dir) and os.path.isdir(aug_label_dir):
        raw_img_path = os.path.join(raw_label_dir, os.listdir(raw_label_dir)[5])
        aug_img_path = os.path.join(aug_label_dir, os.listdir(aug_label_dir)[5])

        raw_img = Image.open(raw_img_path)
        aug_img = Image.open(aug_img_path)

        print(f"Original Image Size: {raw_img.size}")
        print(f"Preprocessed Image Size: {aug_img.size}")

        plt.figure(figsize=(10, 5))
        plt.subplot(1, 2, 1)
        plt.imshow(raw_img, cmap='gray')
        plt.title(f"Original: {label}")
        plt.axis('off')

        plt.subplot(1, 2, 2)
        plt.imshow(aug_img, cmap='gray')
        plt.title(f"Preprocessed: {label}")
        plt.axis('off')
        plt.suptitle(f"Size Comparison for Label: {label}")
        plt.show()
    else:
        print(f"Label '{label}' not found in both directories.")
In [134]:
# Compare sizes for a sample label
compare_image_sizes(balance_uppercase_dir, aug_uppercase_dir, "A")
Original Image Size: (483, 525)
Preprocessed Image Size: (28, 28)
No description has been provided for this image
In [135]:
# Compare sizes for a sample label
compare_image_sizes(balance_lowercase_dir, aug_lowercase_dir, "c")
Original Image Size: (483, 525)
Preprocessed Image Size: (28, 28)
No description has been provided for this image
In [136]:
# Histogram of pixel intensities for original and preprocessed images
def plot_pixel_histogram(raw_dir, aug_dir, label):
    """
    Plots histograms of pixel intensities for original and preprocessed images.
    """
    raw_label_dir = os.path.join(raw_dir, label)
    aug_label_dir = os.path.join(aug_dir, label)

    if os.path.isdir(raw_label_dir) and os.path.isdir(aug_label_dir):
        raw_img_path = os.path.join(raw_label_dir, os.listdir(raw_label_dir)[0])
        aug_img_path = os.path.join(aug_label_dir, os.listdir(aug_label_dir)[0])

        raw_img = np.array(Image.open(raw_img_path))
        aug_img = np.array(Image.open(aug_img_path))

        plt.figure(figsize=(15, 4))
        plt.subplot(1, 2, 1)
        plt.hist(raw_img.ravel(), bins=256, range=(0, 256), color='blue', alpha=0.7)
        plt.title(f"Original: {label}")
        plt.xlabel("Pixel Intensity")
        plt.ylabel("Frequency")

        plt.subplot(1, 2, 2)
        plt.hist(aug_img.ravel(), bins=256, range=(0, 256), color='green', alpha=0.7)
        plt.title(f"Preprocessed: {label}")
        plt.xlabel("Pixel Intensity")
        plt.ylabel("Frequency")
        plt.suptitle(f"Pixel Intensity Histograms for Label: {label}")
        plt.show()
    else:
        print(f"Label '{label}' not found in both directories.")
In [137]:
# Plot histograms for a sample label
plot_pixel_histogram(balance_uppercase_dir, aug_uppercase_dir, "A")
No description has been provided for this image
In [138]:
# Plot histograms for a sample label
plot_pixel_histogram(balance_lowercase_dir, aug_lowercase_dir, "a")
No description has been provided for this image
In [139]:
# Function to display a grid of images for a specific label
def display_label_image_grid(data_dir, label, num_images=25):
    """
    Displays a grid of images for a specific label.
    """
    label_dir = os.path.join(data_dir, label)
    if os.path.isdir(label_dir):
        images = os.listdir(label_dir)
        if images:
            plt.figure(figsize=(10, 10))
            for i in range(min(num_images, len(images))):
                img_path = os.path.join(label_dir, images[i])
                img = Image.open(img_path)
                plt.subplot(5, 5, i+1)
                plt.imshow(img, cmap='gray')
                plt.title(f"Label: {label}")
                plt.axis('off')
            plt.suptitle(f"Image Grid for Label: {label}")
            plt.show()
        else:
            print(f"No images found for label: {label}")
    else:
        print(f"Label '{label}' not found in directory.")
In [140]:
# Display image grid for a specific label
display_label_image_grid(aug_uppercase_dir, "D")
No description has been provided for this image
In [141]:
# Display image grid for a specific label
display_label_image_grid(aug_lowercase_dir, "z")
No description has been provided for this image
In [142]:
# Function to visualize augmentation effects
def visualize_augmentation_effects(data_dir, label):
    """
    Visualizes the effects of augmentation on a sample image.
    """
    label_dir = os.path.join(data_dir, label)
    if os.path.isdir(label_dir):
        images = os.listdir(label_dir)
        if images:
            img_path = os.path.join(label_dir, images[0])
            img = Image.open(img_path)
            img_array = np.array(img)

            # Apply augmentation
            augmented_img = augment_image(img_array)

            plt.figure(figsize=(10, 5))
            plt.subplot(1, 2, 1)
            plt.imshow(img, cmap='gray')
            plt.title("Processed Image")
            plt.axis('off')

            plt.subplot(1, 2, 2)
            plt.imshow(augmented_img, cmap='gray')
            plt.title("Augmented Image")
            plt.axis('off')
            plt.suptitle(f"Augmentation Effects for Label: {label}")
            plt.show()
        else:
            print(f"No images found for label: {label}")
    else:
        print(f"Label '{label}' not found in directory.")
In [143]:
# Visualize augmentation effects for a sample label
visualize_augmentation_effects(aug_uppercase_dir, "E")
No description has been provided for this image
In [144]:
# Visualize augmentation effects for a sample label
visualize_augmentation_effects(aug_lowercase_dir, "f")
No description has been provided for this image
In [145]:
# Function to plot class distribution
def plot_class_distribution(data_dir, title):
    """
    Plots the distribution of images across labels.
    """
    label_counts = {}
    for label in os.listdir(data_dir):
        label_dir = os.path.join(data_dir, label)
        if os.path.isdir(label_dir):
            label_counts[label] = len(os.listdir(label_dir))

    plt.figure(figsize=(12, 6))
    plt.bar(label_counts.keys(), label_counts.values(), color='skyblue')
    plt.xlabel("Labels")
    plt.ylabel("Number of Images")
    plt.title(title)
    plt.xticks(rotation=45)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.show()
In [146]:
# Plot class distribution for uppercase and lowercase
plot_class_distribution(aug_uppercase_dir, "Uppercase Augmented Image Distribution")
No description has been provided for this image
In [147]:
plot_class_distribution(aug_lowercase_dir, "Lowercase Augmented Image Distribution")
No description has been provided for this image
In [148]:
# Function to compute and plot mean and standard deviation of pixel intensities
def plot_pixel_statistics(data_dir, title):
    """
    Computes and plots the mean and standard deviation of pixel intensities for each label.
    """
    label_means = {}
    label_stds = {}
    for label in os.listdir(data_dir):
        label_dir = os.path.join(data_dir, label)
        if os.path.isdir(label_dir):
            pixel_values = []
            for img_name in os.listdir(label_dir):
                img_path = os.path.join(label_dir, img_name)
                img = Image.open(img_path)
                img_array = np.array(img)
                pixel_values.extend(img_array.flatten())
            label_means[label] = np.mean(pixel_values)
            label_stds[label] = np.std(pixel_values)

    plt.figure(figsize=(12, 6))
    plt.bar(label_means.keys(), label_means.values(), color='blue', alpha=0.7, label='Mean')
    plt.bar(label_stds.keys(), label_stds.values(), color='red', alpha=0.7, label='Std Dev')
    plt.xlabel("Labels")
    plt.ylabel("Pixel Intensity")
    plt.title(title)
    plt.xticks(rotation=45)
    plt.legend()
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.show()
In [149]:
# Plot pixel statistics for uppercase and lowercase
plot_pixel_statistics(aug_uppercase_dir, "Uppercase Pixel Statistics")
No description has been provided for this image
In [150]:
plot_pixel_statistics(aug_lowercase_dir, "Lowercase Pixel Statistics")
No description has been provided for this image
In [151]:
# Function to visualize t-SNE or PCA
def visualize_dimensionality_reduction(data_dir, title, method='tsne'):
    """
    Visualizes the high-dimensional image data in 2D using t-SNE or PCA.
    """
    images = []
    labels = []
    for label in os.listdir(data_dir):
        label_dir = os.path.join(data_dir, label)
        if os.path.isdir(label_dir):
            for img_name in os.listdir(label_dir):
                img_path = os.path.join(label_dir, img_name)
                img = Image.open(img_path)
                img_array = np.array(img).flatten()
                images.append(img_array)
                labels.append(label)

    images = np.array(images)
    labels = np.array(labels)

    if method == 'tsne':
        tsne = TSNE(n_components=2, random_state=42)
        reduced_data = tsne.fit_transform(images)
    elif method == 'pca':
        pca = PCA(n_components=2)
        reduced_data = pca.fit_transform(images)

    plt.figure(figsize=(10, 8))
    scatter = plt.scatter(reduced_data[:, 0], reduced_data[:, 1], cmap='tab20', alpha=0.7)
    plt.xlabel("Component 1")
    plt.ylabel("Component 2")
    plt.title(title)
    plt.legend(*scatter.legend_elements(), title="Labels")
    plt.show()
In [152]:
# Visualize t-SNE for uppercase and lowercase
visualize_dimensionality_reduction(aug_uppercase_dir, "t-SNE Visualization for Uppercase", method='tsne')
No description has been provided for this image
In [153]:
visualize_dimensionality_reduction(aug_lowercase_dir, "t-SNE Visualization for Lowercase", method='tsne')
No description has been provided for this image
In [154]:
visualize_dimensionality_reduction(aug_uppercase_dir, "PCA Visualization for Uppercase", method='pca')
No description has been provided for this image
In [155]:
visualize_dimensionality_reduction(aug_lowercase_dir, "PCA Visualization for Uppercase", method='pca')
No description has been provided for this image

*6️⃣ Image splliting*¶

In [190]:
for split in ["train", "test", "val"]:
    for case in ["uppercase", "lowercase"]:
        char_range = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" if case == "uppercase" else "abcdefghijklmnopqrstuvwxyz"
        for char in char_range:
            target_dir = os.path.join(split_dir, split, case, char)
            os.makedirs(target_dir, exist_ok=True)  # Create if it doesn't exist

            # Clear existing files (if any)
            for filename in os.listdir(target_dir):
                file_path = os.path.join(target_dir, filename)
                try:
                    if os.path.isfile(file_path):
                        os.remove(file_path)
                    elif os.path.isdir(file_path): # Handle subdirectories if any
                        shutil.rmtree(file_path)
                except Exception as e: # Catch potential errors and print them
                    print(f"Error deleting {file_path}: {e}")
In [192]:
# Function to load and limit images to 100 per label
def load_and_limit_images(data_dir, label, limit=100):
    """
    Loads images for a specific label and limits the number to a specified limit.
    """
    label_dir = os.path.join(data_dir, label)
    if os.path.isdir(label_dir):
        images = [img for img in os.listdir(label_dir) if img.endswith(".png")]
        if len(images) > limit:
            images = random.sample(images, limit)
        return images
    return []
In [194]:
def split_and_copy_data(source_dir, dest_dir, split_ratio=(0.7, 0.15, 0.15)):
    for case in ["uppercase", "lowercase"]:
        char_range = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" if case == "uppercase" else "abcdefghijklmnopqrstuvwxyz"
        for char in tqdm(char_range, desc=f"Splitting {case}"):
            images = load_and_limit_images(source_dir if case == "uppercase" else source_dir.replace("UPPER","lower"), char) # Dynamic source dir
            if not images: # Skip if no images are found for the character.
                continue
            random.shuffle(images)

            train_len = int(len(images) * split_ratio[0])
            val_len = int(len(images) * split_ratio[1])

            train_images = images[:train_len]
            val_images = images[train_len:train_len + val_len]
            test_images = images[train_len + val_len:]

            for split, image_list in [("train", train_images), ("val", val_images), ("test", test_images)]:
                target_dir = os.path.join(dest_dir, split, case, char)
                for image_name in tqdm(image_list, desc=f"Copying {split} images for {char}", leave=False):
                    source_path = os.path.join(source_dir if case == "uppercase" else source_dir.replace("UPPER","lower"), char, image_name)
                    target_path = os.path.join(target_dir, image_name)
                    shutil.copy(source_path, target_path)
In [196]:
# Example usage (T-2 split):
split_and_copy_data(aug_uppercase_dir, split_dir)
Splitting uppercase:   0%|                                                                      | 0/26 [00:00<?, ?it/s]
Copying train images for A:   0%|                                                               | 0/70 [00:00<?, ?it/s]
Copying train images for A:   4%|██▎                                                    | 3/70 [00:00<00:02, 26.78it/s]
Copying train images for A:  51%|███████████████████████████▎                         | 36/70 [00:00<00:00, 193.78it/s]
                                                                                                                       
Copying val images for A:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for A:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:   4%|██▍                                                           | 1/26 [00:00<00:11,  2.26it/s]
Copying train images for B:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for B:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for B:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:   8%|████▊                                                         | 2/26 [00:00<00:06,  3.61it/s]
Copying train images for C:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for C:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for C:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  12%|███████▏                                                      | 3/26 [00:00<00:05,  4.50it/s]
Copying train images for D:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for D:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for D:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  15%|█████████▌                                                    | 4/26 [00:00<00:04,  5.20it/s]
Copying train images for E:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for E:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for E:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  19%|███████████▉                                                  | 5/26 [00:01<00:03,  5.78it/s]
Copying train images for F:   0%|                                                               | 0/70 [00:00<?, ?it/s]
Copying train images for F:  93%|█████████████████████████████████████████████████▏   | 65/70 [00:00<00:00, 646.03it/s]
                                                                                                                       
Copying val images for F:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for F:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  23%|██████████████▎                                               | 6/26 [00:01<00:03,  5.81it/s]
Copying train images for G:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for G:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for G:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  27%|████████████████▋                                             | 7/26 [00:01<00:03,  6.24it/s]
Copying train images for H:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for H:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for H:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  31%|███████████████████                                           | 8/26 [00:01<00:02,  6.64it/s]
Copying train images for I:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for I:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for I:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  35%|█████████████████████▍                                        | 9/26 [00:01<00:02,  6.88it/s]
Copying train images for J:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for J:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for J:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  38%|███████████████████████▍                                     | 10/26 [00:01<00:02,  7.07it/s]
Copying train images for K:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for K:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for K:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  42%|█████████████████████████▊                                   | 11/26 [00:01<00:02,  6.95it/s]
Copying train images for L:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for L:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for L:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  46%|████████████████████████████▏                                | 12/26 [00:02<00:01,  7.25it/s]
Copying train images for M:   0%|                                                               | 0/70 [00:00<?, ?it/s]
Copying train images for M:  99%|████████████████████████████████████████████████████▏| 69/70 [00:00<00:00, 677.74it/s]
                                                                                                                       
Copying val images for M:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for M:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  50%|██████████████████████████████▌                              | 13/26 [00:02<00:01,  6.86it/s]
Copying train images for N:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for N:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for N:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  54%|████████████████████████████████▊                            | 14/26 [00:02<00:01,  6.80it/s]
Copying train images for O:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for O:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for O:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  58%|███████████████████████████████████▏                         | 15/26 [00:02<00:01,  6.76it/s]
Copying train images for P:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for P:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for P:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  62%|█████████████████████████████████████▌                       | 16/26 [00:02<00:01,  6.93it/s]
Copying train images for Q:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for Q:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for Q:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  65%|███████████████████████████████████████▉                     | 17/26 [00:02<00:01,  6.91it/s]
Copying train images for R:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for R:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for R:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  69%|██████████████████████████████████████████▏                  | 18/26 [00:02<00:01,  6.46it/s]
Copying train images for S:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for S:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for S:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  73%|████████████████████████████████████████████▌                | 19/26 [00:03<00:01,  6.52it/s]
Copying train images for T:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for T:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for T:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  77%|██████████████████████████████████████████████▉              | 20/26 [00:03<00:00,  6.77it/s]
Copying train images for U:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for U:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for U:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  81%|█████████████████████████████████████████████████▎           | 21/26 [00:03<00:00,  6.85it/s]
Copying train images for V:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for V:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for V:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  85%|███████████████████████████████████████████████████▌         | 22/26 [00:03<00:00,  6.85it/s]
Copying train images for W:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for W:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for W:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  88%|█████████████████████████████████████████████████████▉       | 23/26 [00:03<00:00,  6.83it/s]
Copying train images for X:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for X:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for X:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Copying test images for X:  87%|██████████████████████████████████████████████▊       | 13/15 [00:00<00:00, 127.17it/s]
Splitting uppercase:  92%|████████████████████████████████████████████████████████▎    | 24/26 [00:03<00:00,  5.48it/s]
Copying train images for Y:   0%|                                                               | 0/70 [00:00<?, ?it/s]
Copying train images for Y:  64%|██████████████████████████████████                   | 45/70 [00:00<00:00, 423.44it/s]
                                                                                                                       
Copying val images for Y:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for Y:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase:  96%|██████████████████████████████████████████████████████████▋  | 25/26 [00:04<00:00,  4.65it/s]
Copying train images for Z:   0%|                                                               | 0/70 [00:00<?, ?it/s]
Copying train images for Z:  94%|█████████████████████████████████████████████████▉   | 66/70 [00:00<00:00, 658.72it/s]
                                                                                                                       
Copying val images for Z:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for Z:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting uppercase: 100%|█████████████████████████████████████████████████████████████| 26/26 [00:04<00:00,  5.91it/s]
Splitting lowercase:   0%|                                                                      | 0/26 [00:00<?, ?it/s]
Copying train images for a:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for a:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for a:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:   4%|██▍                                                           | 1/26 [00:00<00:03,  7.04it/s]
Copying train images for b:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for b:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for b:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:   8%|████▊                                                         | 2/26 [00:00<00:03,  7.30it/s]
Copying train images for c:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for c:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for c:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  12%|███████▏                                                      | 3/26 [00:00<00:03,  6.97it/s]
Copying train images for d:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for d:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for d:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  15%|█████████▌                                                    | 4/26 [00:00<00:03,  6.80it/s]
Copying train images for e:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for e:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for e:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  19%|███████████▉                                                  | 5/26 [00:00<00:02,  7.12it/s]
Copying train images for f:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for f:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for f:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  23%|██████████████▎                                               | 6/26 [00:00<00:02,  7.30it/s]
Copying train images for g:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for g:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for g:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  27%|████████████████▋                                             | 7/26 [00:00<00:02,  7.53it/s]
Copying train images for h:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for h:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for h:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  31%|███████████████████                                           | 8/26 [00:01<00:02,  7.74it/s]
Copying train images for i:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for i:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for i:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  35%|█████████████████████▍                                        | 9/26 [00:01<00:02,  7.90it/s]
Copying train images for j:   0%|                                                               | 0/70 [00:00<?, ?it/s]
Copying train images for j:  39%|████████████████████▍                                | 27/70 [00:00<00:00, 155.78it/s]
Copying train images for j:  89%|██████████████████████████████████████████████▉      | 62/70 [00:00<00:00, 237.18it/s]
                                                                                                                       
Copying val images for j:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for j:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  38%|███████████████████████▍                                     | 10/26 [00:01<00:03,  4.62it/s]
Copying train images for k:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for k:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for k:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  42%|█████████████████████████▊                                   | 11/26 [00:01<00:03,  4.85it/s]
Copying train images for l:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for l:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for l:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  46%|████████████████████████████▏                                | 12/26 [00:01<00:02,  5.14it/s]
Copying train images for m:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for m:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for m:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  50%|██████████████████████████████▌                              | 13/26 [00:02<00:02,  5.24it/s]
Copying train images for n:   0%|                                                               | 0/70 [00:00<?, ?it/s]
Copying train images for n:  84%|████████████████████████████████████████████▋        | 59/70 [00:00<00:00, 549.58it/s]
                                                                                                                       
Copying val images for n:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for n:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  54%|████████████████████████████████▊                            | 14/26 [00:02<00:02,  5.21it/s]
Copying train images for o:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for o:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for o:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  58%|███████████████████████████████████▏                         | 15/26 [00:02<00:02,  5.43it/s]
Copying train images for p:   0%|                                                               | 0/70 [00:00<?, ?it/s]
Copying train images for p:  80%|██████████████████████████████████████████▍          | 56/70 [00:00<00:00, 550.46it/s]
                                                                                                                       
Copying val images for p:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for p:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  62%|█████████████████████████████████████▌                       | 16/26 [00:02<00:01,  5.15it/s]
Copying train images for q:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for q:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for q:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  65%|███████████████████████████████████████▉                     | 17/26 [00:02<00:01,  5.24it/s]
Copying train images for r:   0%|                                                               | 0/70 [00:00<?, ?it/s]
Copying train images for r:  90%|███████████████████████████████████████████████▋     | 63/70 [00:00<00:00, 623.84it/s]
                                                                                                                       
Copying val images for r:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for r:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  69%|██████████████████████████████████████████▏                  | 18/26 [00:03<00:01,  5.32it/s]
Copying train images for s:   0%|                                                               | 0/70 [00:00<?, ?it/s]
Copying train images for s:  87%|██████████████████████████████████████████████▏      | 61/70 [00:00<00:00, 593.76it/s]
                                                                                                                       
Copying val images for s:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for s:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  73%|████████████████████████████████████████████▌                | 19/26 [00:03<00:01,  4.85it/s]
Copying train images for t:   0%|                                                               | 0/70 [00:00<?, ?it/s]
Copying train images for t:  79%|█████████████████████████████████████████▋           | 55/70 [00:00<00:00, 533.49it/s]
                                                                                                                       
Copying val images for t:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for t:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  77%|██████████████████████████████████████████████▉              | 20/26 [00:03<00:01,  4.71it/s]
Copying train images for u:   0%|                                                               | 0/70 [00:00<?, ?it/s]
Copying train images for u:  97%|███████████████████████████████████████████████████▍ | 68/70 [00:00<00:00, 668.06it/s]
                                                                                                                       
Copying val images for u:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for u:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  81%|█████████████████████████████████████████████████▎           | 21/26 [00:03<00:01,  4.79it/s]
Copying train images for v:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for v:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for v:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  85%|███████████████████████████████████████████████████▌         | 22/26 [00:03<00:00,  5.20it/s]
Copying train images for w:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for w:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for w:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  88%|█████████████████████████████████████████████████████▉       | 23/26 [00:04<00:00,  4.95it/s]
Copying train images for x:   0%|                                                               | 0/70 [00:00<?, ?it/s]
Copying train images for x:  49%|█████████████████████████▋                           | 34/70 [00:00<00:00, 328.68it/s]
                                                                                                                       
Copying val images for x:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for x:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  92%|████████████████████████████████████████████████████████▎    | 24/26 [00:04<00:00,  4.38it/s]
Copying train images for y:   0%|                                                               | 0/70 [00:00<?, ?it/s]
Copying train images for y:  94%|█████████████████████████████████████████████████▉   | 66/70 [00:00<00:00, 642.28it/s]
                                                                                                                       
Copying val images for y:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for y:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase:  96%|██████████████████████████████████████████████████████████▋  | 25/26 [00:04<00:00,  4.51it/s]
Copying train images for z:   0%|                                                               | 0/70 [00:00<?, ?it/s]
                                                                                                                       
Copying val images for z:   0%|                                                                 | 0/15 [00:00<?, ?it/s]
                                                                                                                       
Copying test images for z:   0%|                                                                | 0/15 [00:00<?, ?it/s]
Splitting lowercase: 100%|█████████████████████████████████████████████████████████████| 26/26 [00:04<00:00,  5.36it/s]